library("hdxstats")
library("dplyr")
library("ggplot2")
library("RColorBrewer")
library("tidyr")
library("pheatmap")
library("scales")
library("viridis")
library("patchwork")
library("Biostrings")
library("xfun")
library("tidyverse")
source("R/test_script_app2.R")
Parse input raw data and output QFeatures object instance given a CSV file path for different test cases.
# First test case
csv_filepath <- "/homes/sanjuan/R/x86_64-pc-linux-gnu-library/4.1/hdxstats/extdata/MBP.csv"
# CASE 1: Parse data, given input data file path + parameter file
hdx_data <- extract_hdx_data(csv_filepath, parameter_file = "vignettes/data/myparameters.hdxp")
# CASE 2: Parse data, given input data file path + list of parameters
data <- read_csv(csv_filepath)
myparameters <- make_parameter_file(data)
hdx_data <- extract_hdx_data(csv_filepath, parameters = myparameters)
# CASE 3: Parse data, given input data file path. Work out parameters for pre-processing using interactive mode
hdx_data <- extract_hdx_data(csv_filepath, parameter_file = "vignettes/data/myparameters.hdxp", interactive = TRUE)
# INPUT
data_selection <- hdx_data[,1:24]
all_peptides <- rownames(data_selection)[[1]]
starting_parameters <- list(a = NULL, b = 0.001, d = NULL, p = 1)
# OUTPUT
results <- analyse_kinetics(data = data_selection,
method = "fit",
peptide_selection = all_peptides,
start = starting_parameters)
[1] "INFO: Performing fitting of Deuterium uptake kinetics. Method: 'hdxstats::fitUptakeKinetics' "
Error in nlsModel(formula, mf, start, wts) :
singular gradient matrix at initial parameter estimates
[1] "model fit failed, likely exessive missing values"
Error in nlsModel(formula, mf, start, wts) :
singular gradient matrix at initial parameter estimates
[1] "model fit failed, likely exessive missing values"
Error in nlsModel(formula, mf, start, wts) :
singular gradient matrix at initial parameter estimates
[1] "model fit failed, likely exessive missing values"
Error in nlsModel(formula, mf, start, wts) :
singular gradient matrix at initial parameter estimates
[1] "Could not fit model, likely exessive missing values"
Error in nlsModel(formula, mf, start, wts) :
singular gradient matrix at initial parameter estimates
[1] "model fit failed, likely exessive missing values"
Error in nlsModel(formula, mf, start, wts) :
singular gradient matrix at initial parameter estimates
[1] "model fit failed, likely exessive missing values"
Error in nlsModel(formula, mf, start, wts) :
singular gradient matrix at initial parameter estimates
[1] "model fit failed, likely exessive missing values"
Error in nlsModel(formula, mf, start, wts) :
singular gradient matrix at initial parameter estimates
[1] "model fit failed, likely exessive missing values"
Error in nlsModel(formula, mf, start, wts) :
singular gradient matrix at initial parameter estimates
[1] "Could not fit model, likely exessive missing values"
Error in nlsModel(formula, mf, start, wts) :
singular gradient matrix at initial parameter estimates
[1] "model fit failed, likely exessive missing values"
Error in nlsModel(formula, mf, start, wts) :
singular gradient matrix at initial parameter estimates
[1] "model fit failed, likely exessive missing values"
results$method
[1] "fitUptakeKinetics"
View fitting curves of Deu-uptake kinetics for all available conditions associated to selected peptides
graphics_kinetic <- visualise_hdx_data(results, type="kinetics")
[1] "INFO: I found 104 models in your results data."
graphics_kinetic[[1]] | graphics_kinetic[[2]] | graphics_kinetic[[3]]
View forest plots showing the difference between uptake measurements between conditions, plus the dispersion between their respective fitting model parameters
graphics <- visualise_hdx_data(results, type="forest")
[1] "INFO: I found 104 models in your results data."
Combine graphical outputs in a single canvas
graphics[[1]] | graphics_kinetic[[1]]
Display numerical values as a table
graphics[[1]]$data
# INPUT
data_selection <- hdx_data[,1:100]
all_peptides <- rownames(data_selection)[[1]] # get all peptides
starting_parameters <- list(a = NULL, b = 0.0001, d = NULL, p = 1)
# OUTPUT
results <- analyse_kinetics(data = data_selection,
method = "dfit",
peptide_selection = all_peptides[37],
start = starting_parameters)
[1] "INFO: Performing differential fitting of Deuterium uptake kinetics. Method: 'hdxstats::differentialUptakeKinetics' "
[1] "INFO: You did not specify a 'formula' for your fitting model."
[1] "INFO: Fitting will be performed for (default): 'formula <- value ~ a * (1 - exp(-b*(timepoint)^p)) + d' "
graphics <- visualise_hdx_data(results, type="kinetics")
[1] "INFO: I found 7 models in your results data."
View graphical output for reference peptide shown in LHS Top corner.
graphics
Single-domain antibody (sdAb) binding assays to HOIP.
csv_filepath <- "/homes/sanjuan/R/x86_64-pc-linux-gnu-library/4.1/hdxstats/extdata/N64184_1a2_state.csv"
data <- read_csv(csv_filepath)
Rows: 3600 Columns: 16── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): Protein, Sequence, State
dbl (11): Start, End, MaxUptake, MHP, Exposure, Center, Center SD, Uptake, Uptake SD, RT, RT SD
lgl (2): Modification, Fragment
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
make_parameter_file(data, save = "vignettes/data/N64184_1a2_state.hdxp")
Error in make_parameter_file(data, save = "vignettes/data/N64184_1a2_state.hdxp") :
could not find function "make_parameter_file"
hdx_data <- extract_hdx_data(csv_filepath, parameter_file = "vignettes/data/N64184_1a2_state.hdxp")
[1] "INFO: You gave me a CSV file of your HDX-MSM data"
[1] "INFO: I will pre-process your data parse it using QFeatures ..."
[1] "INFO: You provided a 'parameter_file', I will extract parameters from this to format your output QFeatures data object."
[1] "INFO: You provided a list of 'parameters', I will extract parameters from this to format your output QFeatures data object."
[1] "INFO: Stripped your 'Exposure_Time' values from non-numeric characters."
[1] "INFO: Your original_time_units == 'm'. I will convert your 'Exposure_Time' values to seconds (s)."
[1] "INFO: Your 'Replicate' column appears to be NA. I will add this column with 1 values just to label your data."
[1] "INFO: Your 'Charge' column appears to be NA. I will add this column with 0 values just to label your data."
[1] "INFO: Reformatting your data to a wide format..."
[1] "INFO: Parsing your data as a qDF object class instance. Method: parseDeutData"
[1] "WARNING: Your output data is not normalised."
[1] "WARNING: Your output data was not saved. You can provide an output path with 'save = my_path'"
[1] "INFO: I pre-processed you input CSV data content and now it's available as a QFeatures instance"
# INPUT
data_selection <- hdx_data[,1:33]
all_peptides <- rownames(data_selection)[[1]] # get all peptides
starting_parameters <- list(a = NULL, b = 0.01, d = NULL)
formula = value ~ a * (1 - exp(-b*(timepoint))) + d
# OUTPUT
results <- analyse_kinetics(data = data_selection,
method = "dfit",
peptide_selection = all_peptides[3],
start = starting_parameters,
formula = formula)
[1] "INFO: Performing differential fitting of Deuterium uptake kinetics. Method: 'hdxstats::differentialUptakeKinetics' "
[1] "INFO: You specified your own 'formula' for your fitting model."
graphics <- visualise_hdx_data(results, type="kinetics")
[1] "INFO: I found 11 models in your results data."
custom_colors <- scale_color_manual(values = colorRampPalette(brewer.pal(8, name = "Set2"))(11))
graphics + custom_colors
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Original raw data
#csv_filepath <- "inst/extdata/Project_2_SecA_Cluster_Data.csv"
#data <- read_csv(csv_filepath)
#data$Replicate <- unlist(lapply(strsplit(data$File, split="_"), function(x) tail(x, n=1)))
#write_csv(data, file = "inst/extdata/Project_2_SecA_Cluster_Data_edited.csv")
csv_filepath <- "inst/extdata/Project_2_SecA_Cluster_Data_edited.csv"
Had to pre-process its content to add Replicate column
data <- read_csv(csv_filepath)
Rows: 31855 Columns: 16── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (4): Protein, Sequence, State, File
dbl (10): Start, End, MaxUptake, MHP, Exposure, z, RT, Inten, Center, Replicate
lgl (2): Modification, Fragment
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
hdx_data <- extract_hdx_data(csv_filepath, parameter_file = "vignettes/data/Project_2_SecA_Cluster_Data.hdxp")
[1] "INFO: You gave me a CSV file of your HDX-MSM data"
[1] "INFO: I will pre-process your data parse it using QFeatures ..."
[1] "INFO: You provided a 'parameter_file', I will extract parameters from this to format your output QFeatures data object."
[1] "INFO: You provided a list of 'parameters', I will extract parameters from this to format your output QFeatures data object."
[1] "INFO: Stripped your 'Exposure_Time' values from non-numeric characters."
[1] "INFO: Your original_time_units == 'm'. I will convert your 'Exposure_Time' values to seconds (s)."
[1] "INFO: Reformatting your data to a wide format..."
[1] "INFO: Parsing your data as a qDF object class instance. Method: parseDeutData"
Making assay rownames unique.
[1] "WARNING: Your output data is not normalised."
[1] "WARNING: Your output data was not saved. You can provide an output path with 'save = my_path'"
[1] "INFO: I pre-processed you input CSV data content and now it's available as a QFeatures instance"
pheatmap(t(assay(hdx_data)),
cluster_rows = FALSE,
cluster_cols = FALSE,
color = brewer.pal(n = 9, name = "BuPu"),
main = "secA heatmap",
fontsize = 14,
legend_breaks = c(0, 1, 2, 3, 4, 5, 6, max(assay(hdx_data))),
legend_labels = c("0", "1", "2", "3", "4", "5", "6", "Incorporation"))
graphics <- visualise_hdx_data(results, type="kinetics") # READY
graphics <- visualise_hdx_data(results, type="forest") # READY
# NOTE: This only works to compare ONLY TWO distinct conditions, for the same timepoint, for all peptide fragments
# This needs to define a BASELINE state/condition and an ALTERNATE sate/condition
# NOTE: Data must be normalised by intercept
graphics <- visualise_hdx_data(results, type="manhatten", reference = NULL)# <<<<--- NEXT
# NOTE: NOT SURE ??
graphics <- visualise_hdx_data(results, type="epitope", level="peptide", fasta = "my_fasta_path") # Return an EpitopeMap
graphics <- visualise_hdx_data(results, type="epitope", level="residue", fasta = "my_fasta_path") # Return heatmap
graphics <- visualise_hdx_data(results, type="epitope", level="residue", pdb="my_pdb_path") # Return heatmap projected onto PDB
graphics <- visualise_hdx_data(results, type="protection", level="peptide") # Return heatmap for peptide residue-number ranges
graphics <- visualise_hdx_data(results, type="protection", level="residue", fasta = "my_fasta_path") # Return heatmap
graphics <- visualise_hdx_data(results, type="protection", level="residue", pdb="my_pdb_path") # Return heatmap projected onto PDB
# GUI: Make GUI by assembling these building blocks